home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Skunkware 5
/
Skunkware 5.iso
/
src
/
Tools
/
glimpse-2.1
/
index
/
dir.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-05-16
|
15KB
|
517 lines
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
/* ./glimpse/index/dir.c */
/* The function of the program is to traverse the
direcctory tree and print the size of the files in the tree.
This program is derived from the C-programming language book
It opens a directory file using opendir system call, and use readdir()
to read each entry of the directory.
*/
#include <stdio.h>
#include <sys/types.h>
#if DIRENT
#include <dirent.h>
#else /*DIRENT*/
#include <sys/dir.h>
#endif /*DIRENT*/
#include <sys/stat.h>
#include <fcntl.h>
#define BUFSIZE 256
#define DIRSIZE 14
#include "glimpse.h"
#undef MAX_LIST
#define MAX_LIST 100000
#if BG_DEBUG
extern FILE *LOGFILE;
#endif /*BG_DEBUG*/
extern FILE *MESSAGEFILE;
int ndx = 0; /* file index */
extern char *name_list[MAX_LIST]; /* store the file names */
extern int *size_list; /* store the sizes of the files */
extern int disable_list[FILEMASK_SIZE]; /* store whether to DISABLE indexing or not */
extern int file_num;
extern int file_id; /* borrowed from filetype.c */
extern char INDEX_DIR[MAX_LINE_LEN];
extern int AddToIndex;
extern int FastIndex;
extern int OneFilePerBlock;
extern int IncludeHigherPriority;
extern int BuildDictionaryExisting;
extern int printed_warning;
extern int p_table[];
extern FILE *STATFILE;
extern int IndexableFile;
extern int files_per_partition;
extern int new_partition;
extern int files_in_partition;
extern struct stat istbuf; /* imported from glimpse.c */
extern int memory_usage;
extern int mask_int[];
extern char exin_argv[8];
extern int exin_argc;
extern char current_dir_buf[2*MAX_LINE_LEN + 4]; /* must have space to store pattern after directory name */
extern unsigned char dummypat[MAX_PAT];
extern int dummylen;
extern FILE *dummyout;
extern struct stat excstbuf;
extern struct stat incstbuf;
extern int num_filter;
extern int filter_len[MAX_FILTER];
extern CHAR *filter[MAX_FILTER];
extern CHAR *filter_command[MAX_FILTER];
/*
* Exclude/Include priorities with exclude > include (IncludeHigherPriority = OFF = default):
* 1. Command line arguments (inclusion --> exclude list is never applied)
* 2. Exclude list (exclusion)
* 3. Include list (inclusion)
* 5. Symbolic links (exclusion --> applying exclude list is unnecessary)
* 4. filetype (exclusion)
*
* Exclude/Include priorities with include > exclude (IncludeHigherPriority = ON = -i):
* 1. Command line arguments (inclusion --> exclude list is never applied)
* 2. Include list (inclusion)
* 3. Symbolic links (exclusion --> applying exclude list is unnecessary)
* 3. Exclude list (exclusion)
* 4. filetype (exclusion)
*/
char outname[MAX_LINE_LEN];
char inname[MAX_LINE_LEN];
fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, toplevel)
char *name;
char **pat;
int *pat_len;
int num_pat;
char **inc;
int *inc_len;
int num_inc;
int toplevel;
{
struct stat stbuf;
int i;
int fileindex = -1;
int force_include = 0;
int len_current_dir_buf = strlen(current_dir_buf) + 1; /* includes the '\0' which is going to be replaced by '\n' below */
int name_len;
name_len = strlen(name); /* name[name_len] is '\0' */
#ifdef SW_DEBUG
printf("num_pat= %d num_inc= %d\n", num_pat, num_inc);
printf("name= %s\n", name);
#endif
/*
* Find out what to exclude, what to include and skip
* over symbolic links that don't HAVE to be included.
* Some Extra get_filename_index calls are done but
* that won't cost you anything (just #ing twice).
*/
/* Check if cache set in glimpse.c is correct */
if (!IndexableFile && FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1) && (disable_list[block2index(fileindex)] & mask_int[fileindex % (8*sizeof(int))])) {
if (num_pat <= 0) {
if (num_inc <= 0) return 0;
else if (incstbuf.st_ctime <= istbuf.st_ctime) return 0;
}
else {
if (num_inc <= 0) {
if (excstbuf.st_ctime <= istbuf.st_ctime) return 0;
}
else if ((excstbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)) return 0;
}
}
#define PROCESS_EXIT \
{\
if (AddToIndex || FastIndex) {\
if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) \
remove_filename(fileindex, new_partition);\
}\
}
#define PROCESS_EXCLUDE \
{\
if (!toplevel) for(i=0; i<num_pat; i++) { /* bg: 15/mar/94 */\
if (pat_len[i] > 0) {\
name[name_len] = '\0';\
if (strstr(name, pat[i]) != NULL) {\
PROCESS_EXIT;\
return 0;\
}\
}\
else { /* must call memagrep */\
int ret;\
name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\
/* printf("i=%d patlen=%d pat=%s inlen=%d input=%s\n", i, -pat_len[i], pat[i], len_current_dir_buf, current_dir_buf); */\
if (((pat_len[i] == -2) && (pat[i][0] == '.') && (pat[i][1] == '*')) ||\
((ret = memagrep_search(-pat_len[i], pat[i], len_current_dir_buf, current_dir_buf, 0, dummyout)) > 0))\
{\
/* printf("excluding with %d %s\n", ret, name); */\
name[name_len] = '\0'; /* restore */\
PROCESS_EXIT;\
return 0; \
}\
/* else printf("ret=%d\n");*/\
}\
}\
name[name_len] = '\0';\
}
#define PROCESS_INCLUDE \
{\
/*\
* When include has higher priority, we want to include directories\
* by default and match the include patterns only against filenames.\
* Based on bug reports for glimpse-2.1. bg: 2/mar/95.\
*/\
if (IncludeHigherPriority && ((stbuf.st_mode & S_IFMT) == S_IFDIR)) force_include = 1;\
else for (i=0; i<num_inc; i++) { /* bg: 15/mar/94 */\
if (inc_len[i] > 0) {\
name[name_len] = '\0';\
if (strstr(name, inc[i]) != NULL) {\
force_include = 1;\
break;\
}\
}\
else { /* must call memagrep */\
name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\
/* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\
if (((inc_len[i] == -2) && (inc[i][0] == '.') && (inc[i][1] == '*')) ||\
(memagrep_search(-inc_len[i], inc[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\
{\
force_include = 1;\
break;\
}\
}\
}\
if (!force_include) for (i=0; i<num_filter; i++) { /* bg: 16/sep/94 */\
if (filter_len[i] > 0) {\
name[name_len] = '\0';\
if (strstr(name, filter[i]) != NULL) {\
force_include = 1;\
break;\
}\
}\
else { /* must call memagrep */\
name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\
/* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\
if (((filter_len[i] == -1) && (filter[i][0] == '.') && (filter[i][1] == '*')) ||\
(memagrep_search(-filter_len[i], filter[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\
{\
force_include = 1;\
break;\
}\
}\
}\
name[name_len] = '\0'; /* restore */\
if (toplevel) force_include = 1;\
}
if(lstat(name, &stbuf) == -1) {
if (IndexableFile) return 0;
/* Can happen for command line arguments, not stuff obtained from fsize_directory() */
#if BG_DEBUG
fprintf(LOGFILE, "cannot find %s -- not indexing\n", name);
#endif /*BG_DEBUG*/
PROCESS_EXIT;
return 0;
}
/* Else lstat has all the requisite information */
if ((stbuf.st_mode & S_IFMT) == S_IFLNK) {
if (IndexableFile) return 0;
PROCESS_INCLUDE;
if (!force_include) {
#if BG_DEBUG
fprintf(LOGFILE, "%s is a symbolic link -- not indexing\n", name);
#endif /*BG_DEBUG*/
PROCESS_EXIT;
return 0;
}
if (-1 == stat(name, &stbuf)) {
#if BG_DEBUG
fprintf(LOGFILE, "cannot find target of symbolic link %s -- not indexing\n", name);
#endif /*BG_DEBUG*/
PROCESS_EXIT;
return 0;
}
}
else /* if (!IndexableFile) */ {
/* Put exclude include processing here... stat all the time: that is faster than former! */
if (FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1)) {
/* Don't process exclude/include if the file `name' is older then the index AND the exclude/include file is older then the index */
if (IncludeHigherPriority) {
if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)))
PROCESS_INCLUDE;
if (!force_include && !((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime)))
PROCESS_EXCLUDE;
}
else {
if (!((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime)))
PROCESS_EXCLUDE;
if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)))
PROCESS_INCLUDE;
}
}
else { /* Either AddToIndex or fresh indexing or previously excluded file: process exclude and include */
if (IncludeHigherPriority) {
PROCESS_INCLUDE;
if (!force_include)
PROCESS_EXCLUDE;
}
else {
PROCESS_EXCLUDE;
PROCESS_INCLUDE;
}
}
}
/* Here, the file exists and has not been excluded -- possibly has been included */
if ((stbuf.st_mode & S_IFMT) == S_IFDIR) {
if (-1 == fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)) return -1;
}
else if ((stbuf.st_mode & S_IFMT) == S_IFREG) { /* regular file */
if (IndexableFile) {
if (!filetype(name, 0)) printf("%s\n", name);
return 0;
}
file_id ++;
if (BuildDictionaryExisting) {
/* Don't even store the names of the files that are not uncompressible */
if (file_num >= MAXNUM_FILE) {
fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MAXNUM_FILE);
return -1;
}
if (tuncompress_file(name, outname, TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT) <= 0) return 0;
file_num++;
name_list[ndx] = (char *) my_malloc(strlen(outname) + 2);
strcpy(name_list[ndx], outname);
size_list[ndx++] = stbuf.st_size;
return 0;
}
#ifdef SW_DEBUG
printf("%s: ", name);
#endif
if (AddToIndex || FastIndex) {
if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) {
size_list[fileindex] = stbuf.st_size;
if (FastIndex && (stbuf.st_ctime <= istbuf.st_ctime))
disable_list[block2index(fileindex)] |= mask_int[fileindex % (8*sizeof(int))];
else { /* AddToIndex or file was modified (=> its type might have changed!) */
if (filetype(name, 1)) {
if (!force_include) {
remove_filename(fileindex, new_partition);
return 0;
}
else {
#if BG_DEBUG
fprintf(LOGFILE, "overriding and indexing: %s\n", name);
#endif /*BG_DEBUG*/
}
}
disable_list[block2index(fileindex)] &= ~(mask_int[fileindex % (8*sizeof(int))]);
}
}
else { /* new file not in filenames so no point in checking */
if(filetype(name, 1)) {
if (!force_include) return 0;
else {
#if BG_DEBUG
fprintf(LOGFILE, "overriding and indexing: %s\n", name);
#endif /*BG_DEBUG*/
}
}
if (file_num >= MAXNUM_FILE) {
fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MAXNUM_FILE);
return -1;
}
name_list[file_num] = (char *)my_malloc(strlen(name) + 2);
strcpy(name_list[file_num], name);
size_list[file_num] = stbuf.st_size;
insert_filename(name_list[file_num], file_num);
file_num ++;
if (!OneFilePerBlock) {
if (files_in_partition + 1 > files_per_partition) {
if (new_partition + 1 > MaxNumPartition) {
if (!printed_warning) {
printed_warning = 1;
if (AddToIndex) {
fprintf(MESSAGEFILE, "Warning: partition-table overflow! Fresh indexing recommended.n");
}
else {
fprintf(MESSAGEFILE, "Warning: partition-table overflow! Commencing fresh indexing...\n");
return -1;
}
}
}
else new_partition++;
files_in_partition = 0;
/* so that we don't get into this if-branch until another files_per_partition new files are seen */
}
p_table[new_partition] = file_num;
files_in_partition ++;
}
}
}
else { /* Fresh indexing: very simple -- add everything */
if(filetype(name, 1)) {
if (!force_include) return 0;
else {
#if BG_DEBUG
fprintf(LOGFILE, "overriding and indexing: %s\n", name);
#endif /*BG_DEBUG*/
}
}
if (file_num >= MAXNUM_FILE) {
fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MAXNUM_FILE);
return -1;
}
file_num++;
name_list[ndx] = (char *) my_malloc(strlen(name) + 2);
strcpy(name_list[ndx], name);
size_list[ndx++] = stbuf.st_size;
}
}
return 0;
}
/* uses the space in the same "name" to get names of files in that directory and calls fsize */
/* pat, pat_len, num_pat, inc, inc_len, num_inc are just used for recursive calls to fsize */
fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)
char *name;
char **pat;
int *pat_len;
int num_pat;
char **inc;
int *inc_len;
int num_inc;
{
#if DIRENT
struct dirent *dp;
#else /*DIRENT*/
struct direct *dp;
#endif /*DIRENT*/
char *nbp, *nep;
int i;
DIR *dirp;
/*
printf("in fsize_directory, name= %s\n",name);
*/
nbp = name + strlen(name);
if( nbp+DIRSIZE+2 >= name+BUFSIZE ) /* name too long */
{ fprintf(stderr, "name too long\n");
return 0;
}
if((dirp = opendir(name)) == NULL) {
fprintf(stderr, "permission denied or non-existent directory: %s\n", name);
return 0;
}
*nbp++ = '/';
for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..")==0)
goto CONT;
for(i=0, nep=nbp; dp->d_name[i] != '\0'; i++)
*nep++ = dp->d_name[i];
*nep++ = '\0';
/*
printf("name= %s\n", name);
*/
if (-1 == fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, 0)) return -1;
CONT:
i=0;
}
closedir (dirp);
*--nbp = '\0'; /* restore name */
return 0;
}
typedef struct _name_hashelement {
char name[MAX_LINE_LEN];
int index;
struct _name_hashelement *next;
} name_hashelement;
name_hashelement *name_hashtable[MAX_4K_HASH];
/*
* Returns the index of the name if the it is found amongst the set
* of files in name_array; -1 otherwise.
*/
int
get_filename_index(name)
char *name;
{
int index = hash4k(name, strlen(name));
/* int skips=0; */
name_hashelement *e;
e = name_hashtable[index];
while((e != NULL) && (strcmp(name, e->name))) {
/* skips ++; */
e = e->next;
}
/* fprintf(STATFILE, "skips = %d\n", skips); */
if (e == NULL) return -1;
return e->index;
}
insert_filename(name, name_index)
char *name;
int name_index;
{
int len = strlen(name);
int index;
name_hashelement **pe;
index = hash4k(name, len);
pe = &name_hashtable[index];
while((*pe != NULL) && (strcmp((*pe)->name, name))) pe = &(*pe)->next;
if ((*pe) != NULL) return;
*pe = (name_hashelement *)my_malloc(sizeof(name_hashelement));
(*pe)->next = NULL;
strcpy((*pe)->name, name);
(*pe)->index = name_index;
}
build_filename_hashtable(names, num)
char *names[];
int num;
{
int i;
for (i=0; i<MAX_4K_HASH; i++) name_hashtable[i] = NULL;
for (i=0; i<num; i++) insert_filename(names[i], i);
}
destroy_filename_hashtable()
{
int i;
name_hashelement *e, *t;
for (i=0; i<MAX_4K_HASH; i++) {
e = name_hashtable[i];
while(e!=NULL) {
t = e;
e = e->next;
memory_usage -= sizeof(name_hashelement);
my_free(t, 0);
}
name_hashtable[i] = NULL;
}
}